package spark.example

import org.apache.spark.{SparkConf,SparkContext}

object slefMergeFile {
    def main(args: Array[String]) {
        val conf = new SparkConf().setAppName("mergeFile1")

        val sc = new SparkContext(conf)
        //val output = "hdfs://master:9000/spark_mergefile_output"
        val input = args(0).toString
        val output = args(1).toString
        sc.wholeTextFiles(input)
	  .map(x => x._2)
	  .map(x => x.replaceAll("[^\u4e00-\u9fa5]+"," "))
	  .zipWithIndex()
	  .map(x => (1000 + x._2).toString + "\t" + x._1)
	  .repartition(1)
	  .saveAsTextFile(output)
    }
}
